library(tidyverse)
library(magrittr)
library(scales)

# setwd("./replication")

### Load dataset
df <- read_csv("data.csv")

collected <- df %>% na.omit() %>%
  gather("example", "value", 4:24)

### Relabel dataset
collected %<>% mutate(
  new_lab = case_when(
    example == 'mobilization' ~ 'Mobilization',
    example %in% c('arms_buildup', 'military_expenditure') ~ 'Arms Buildups',
    example %in% c('sponsoring_military_events', 'arms_transfer') ~ 'Arms Transfer',
    example %in% c('alliance', 'military_coordination')  ~ 'Alliance',
    example %in% c('military_demonstrations') ~ 'Military Demonstrations',
    example %in% c('weapon_deployment', 'tripwire_forces') ~ 'Tripwires and Deployments',
    example == 'sanctions' ~ 'Economic Sanctions',
    example %in% c('treaty_implementation', 'retrenchment', 'econ_interdependence', 'contrition', 'political_visit', 'nuclear_latency') ~ 'Reassurance',
    example %in% c('cyber_operations', 'threats') ~ 'Others',
    example %in% c('fiction', 'terrorism') ~ 'Suicide Terrorism and\nIndividual-Level Examples'
  ) %>%  factor(levels = c('Mobilization', 'Arms Buildups', 'Arms Transfer',  'Alliance',  'Military Demonstrations', 'Tripwires and Deployments', 'Economic Sanctions', 'Reassurance', 'Others', 'Suicide Terrorism and\nIndividual-Level Examples'))
) 

### Summary statistics

## Total number of articles in sample space
nrow(df)

## Total number of examples
nrow(collected[collected$value > 0,])

## Number of unique articles with a sinking cost example
nrow(unique(collected[collected$value > 0, 1:2]))
(nrow(unique(collected[collected$value > 0, 1:2])) / nrow(df)) %>% 
  percent(accuracy = 0.1)

### Figures

by_year <- collected %>%
  filter(year >= 1997) %>% # Eliminate empty space in front
  group_by(new_lab, year) %>%
  summarise(count = sum(value)) 

by_year <- by_year %>% 
  group_by(new_lab) %>%
  mutate(cum.fq = cumsum(count))

### Frequency against Time
ggplot(by_year, aes(year, count, fill = new_lab)) + 
  geom_bar(stat = "identity", position = "stack") +
  scale_x_continuous(breaks = c(1997, seq(2000, 2020, 5))) +
  theme_bw() +
  labs(
    fill = "Examples",
    y = "Frequency",
    x = "Year"
  ) +
  theme(
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

### Cumulative Frequency against Time
ggplot(by_year, aes(year, cum.fq, fill = new_lab)) + 
  geom_bar(stat = "identity") +
  scale_x_continuous(breaks = c(1997, seq(2000, 2020, 5))) +
  theme_bw() +
  labs(
    fill = "Examples",
    y = "Cumulative Frequency",
    x = "Year"
  ) +
  theme(
    panel.grid = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )
